# load libraries
pacman::p_load(readxl, car, jtools, tidyverse)

# load data
data = read_excel('DataSets.xlsx',
                  sheet = 'z_t_test',
                  range = 'L23:Q498'
                  )
# explore data
head(data)
names(data)
str(data)

# process data: convert to factors
fact_var = c('Sex', 'Breast_feeding')
data[fact_var] = lapply(data[fact_var], as.factor)
str(data)

# t-test: H0: Weights of baby girl and baby boys are equal
t.test(Weight ~ Sex, data,
       var.equal = TRUE)

# p < 0.05, H0 rejected
# Differences exist between the weights of baby boys and girls

# Check assumptions
# 1. Normality
boxplot(Weight ~ Sex, data)
qqPlot(Weight ~ Sex, data)

# 2. Equality of variance
leveneTest(Weight ~ Sex, data)
# p > 0.05, sufficient to support the H0: Variances are equal.

# Summary of data
summary(data)

# Group-wise means
library(dplyr)
data %>% group_by(Sex) %>% 
  summarise(mean = mean(Weight), sd = sd(Weight))

# Q. Check whether the heights of the breast fed babies differ from that of the non-breast fed babies.
summary(data$Breast_feeding)

# Change the value '2' as '1'
data = read_excel('DataSets.xlsx',
                  sheet = 'z_t_test',
                  range = 'L23:Q498'
)

data$Breast_feeding[data$Breast_feeding == 2] = 1

# process data: convert to factors
fact_var = c('Sex', 'Breast_feeding')
data[fact_var] = lapply(data[fact_var], as.factor)
str(data)

summary(data$Breast_feeding)
levels(data$Breast_feeding)

# Run t-test
t.test(Height ~ Breast_feeding, data, var.equal = TRUE)

# Run Welch test
t.test(Height ~ Breast_feeding, data, var.equal = FALSE)

# Wilcoxon test
wilcox.test(Height ~ Breast_feeding, data)

# Paired t-test
diet = read_excel('DataSets.xlsx', 
                  sheet = 'z_t_test',
                  range = 'C31:E61')
# Run paired t-test
t.test(diet$before, diet$after, 
       paired = TRUE, var.equal = TRUE)

mean(diet$before)
mean(diet$after)
